Bergen et al., 2021 Beyond the scope of computational modeling, the statistical power of the methods depends on the curvature in the phase portrait since a lack of curvature challenges current models to distinguish whether an up- or down-regulation is occurring. The overall curvature of deviation from the steady-state line in the phase portrait is mostly impacted by the ratios of splicing to degradation rates (Box 1), indicating that statistical inference is limited to genes where splicing is faster or comparable to degradation, while a small ratio would yield straight lines rather than an interpretable curvature.
import scvelo as scv
import numpy as np
import pandas as pd
import scipy
import scanpy as sc
import os
# rpy2
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'
sc.settings.vector_friendly = False
scv.set_figure_params(figsize=(2, 5))
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
# Plotting
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}
adata = sc.read_h5ad('data/object/velocyto.h5ad')
obs = pd.read_csv('data/object/adata_sct_hvg2000/meta/meta.csv', index_col=0)
obsm = pd.read_csv('data/object/adata_sct_hvg2000/reductions/X_umap_paga/reduction.csv', index_col=0)
leiden_annotation = ['MEP', 'Ery (1)', 'Ery (2)', 'Ery (3)', 'Ery (4)']
# Filter obs by Ery annotation and treatment
obs = obs[obs['leiden_annotation'].isin(leiden_annotation)]
# Filter obsm by cell index
obsm = obsm[obsm.index.isin(obs.index)]
# Filter velocity adata by obs
adata = adata[adata.obs.index.isin(obs.index)]
# Order index to match velocity adata
obs = obs.reindex(adata.obs.index)
obsm = obsm.reindex(adata.obs.index)
adata.obs = obs
adata.obsm['X_umap'] = obsm.to_numpy()
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'leiden_annotation', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pMt_RNA', 'pHb_RNA', 'pRb_RNA'], wspace=1, ncols=5)
... storing 'sample_name' as categorical ... storing 'sample_rep' as categorical ... storing 'sample_group' as categorical ... storing 'sample_path' as categorical ... storing 'sample_dir' as categorical ... storing 'label_main_immgen' as categorical ... storing 'label_fine_immgen' as categorical ... storing 'qc_class' as categorical ... storing 'doublet_cluster' as categorical ... storing 'doublet_class' as categorical ... storing 'doublet_class_int' as categorical ... storing 'leiden_res' as categorical
adata_temp = adata.copy()
def hvg_select(subset):
adata = adata_temp.copy()
adata = adata[adata.obs['treatment']==subset].copy()
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
# Reset adata
hvg = adata.var_names
return(hvg)
hvg_nacl = hvg_select('NaCl')
hvg_cpg = hvg_select('CpG')
Filtered out 26597 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X. Filtered out 27245 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X.
hvg = list(set(hvg_nacl) & set(hvg_cpg))
len(hvg)
1079
adata = adata_temp.copy()
scv.pp.filter_and_normalize(adata, min_shared_counts=20, subset_highly_variable=False)
adata = adata[:, hvg]
Filtered out 25883 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Logarithmized X.
scv.pp.moments(adata)
computing neighbors
finished (0:00:27) --> added
'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
finished (0:00:01) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
scv.tl.recover_dynamics(adata)
recovering dynamics (using 1/32 cores)
finished (0:10:42) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:04) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/32 cores)
finished (0:00:18) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
# del adata.obs['root_cells']
# del adata.obs['end_points']
# del adata.obs['velocity_pseudotime']
# del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.1)
computing terminal states
identified 3 regions of root cells and 1 region of end points .
finished (0:00:03) --> added
'root_cells', root cells of Markov diffusion process (adata.obs)
'end_points', end points of Markov diffusion process (adata.obs)
computing latent time using root_cells as prior
finished (0:00:03) --> added
'latent_time', shared time (adata.obs)
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])
computing velocity embedding
finished (0:00:03) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).dropna().index
scv.tl.differential_kinetic_test(adata, var_names=top_genes, groupby='leiden_annotation')
testing for differential kinetics
finished (0:02:52) --> added
'fit_diff_kinetics', clusters displaying differential kinetics (adata.var)
'fit_pvals_kinetics', p-values of differential kinetics (adata.var)
adata.write_h5ad('data/object/scvelo.h5ad')
... storing 'fit_diff_kinetics' as categorical
adata = sc.read_h5ad('data/object/scvelo.h5ad')
scv.tl.velocity(adata, mode='dynamical', diff_kinetics=True)
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:05) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/32 cores)
finished (0:00:18) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
# del adata.obs['root_cells']
# del adata.obs['end_points']
# del adata.obs['velocity_pseudotime']
# del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.1)
computing latent time using root_cells as prior
finished (0:00:02) --> added
'latent_time', shared time (adata.obs)
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])
computing velocity embedding
finished (0:00:03) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])
adata.write_h5ad('data/object/scvelo_dkg.h5ad')
adata = sc.read_h5ad('data/object/scvelo.h5ad')
scv.tl.velocity(adata, mode='dynamical', groupby='treatment', groups=['NaCl', 'CpG'])
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:05) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/32 cores)
finished (0:00:18) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
# del adata.obs['root_cells']
# del adata.obs['end_points']
# del adata.obs['velocity_pseudotime']
# del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.1)
computing latent time using root_cells as prior
finished (0:00:03) --> added
'latent_time', shared time (adata.obs)
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])
computing velocity embedding
finished (0:00:03) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])
adata.write_h5ad('data/object/scvelo_grp.h5ad')
adata.obs[['latent_time']].to_csv('result/scvelo/latent_time_grp.csv')
adata = sc.read_h5ad('data/object/scvelo.h5ad')
scv.tl.velocity(adata, mode='dynamical', diff_kinetics=True, groupby='treatment', groups=['NaCl', 'CpG'])
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:05) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/32 cores)
finished (0:00:18) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
# del adata.obs['root_cells']
# del adata.obs['end_points']
# del adata.obs['velocity_pseudotime']
# del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.1)
computing latent time using root_cells as prior
finished (0:00:02) --> added
'latent_time', shared time (adata.obs)
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])
computing velocity embedding
finished (0:00:03) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])
adata.write_h5ad('data/object/scvelo_grp_dkg.h5ad')